This is the supplementary material to an invited commentary for Basole et al. (2021). We provide all code that are used to generate the figures in the commentary in addition to other supplementary figures (and its code). To see the code, click on the CODE button. You can also download the whole R Markdown file from the drop down menu on the top right corner.
List of figures
ggplot2.library(tidyverse)
library(ggtext)
library(patchwork)
library(readxl)
library(nullabor)
library(here)
library(janitor)
library(scales)
#theme_set(theme_classic())
knitr::opts_chunk$set(fig.path = "images/",
dev = c("png", "pdf", "svg"),
cache = TRUE,
cache.path = "cache/")
df_full <- read_xlsx(here("data/MaskedCoverage-Fig3.xlsx")) %>%
clean_names() %>%
add_row(state = c("OR", "WY", "SD", "WV", "DC", "AL")) %>%
mutate(row = case_when(
state %in% c("ME") ~ 1L,
state %in% c("VT", "NH") ~ 2L,
state %in% c("WA", "ID", "MT", "ND", "MN", "IL", "WI", "MI", "NY", "RI", "MA") ~ 3L,
state %in% c("OR", "NV", "WY", "SD", "IA", "IN", "OH", "PA", "NJ", "CT") ~ 4L,
state %in% c("CA", "UT", "CO", "NE", "MO", "KY", "WV", "VA", "MD", "DE") ~ 5L,
state %in% c("AZ", "NM", "KS", "AR", "TN", "NC", "SC", "DC") ~ 6L,
state %in% c("OK", "LA", "MS", "AL", "GA") ~ 7L,
state %in% c("TX", "FL") ~ 8L,
TRUE ~ 0L),
col = case_when(
state %in% c("WA", "OR", "CA") ~ 1L,
state %in% c("ID", "NV", "UT", "AZ") ~ 2L,
state %in% c("MT", "WY", "CO", "NM") ~ 3L,
state %in% c("ND", "SD", "NE", "KS", "OK", "TX") ~ 4L,
state %in% c("MN", "IA", "MO", "AR", "LA") ~ 5L,
state %in% c("IL", "IN", "KY", "TN", "MS") ~ 6L,
state %in% c("WI", "OH", "WV", "NC", "AL") ~ 7L,
state %in% c("MI", "PA", "VA", "SC", "GA") ~ 8L,
state %in% c("NY", "NJ", "MD", "DC", "FL") ~ 9L,
state %in% c("VT", "RI", "CT", "DE") ~ 10L,
state %in% c("ME", "NH", "MA") ~ 11L,
TRUE ~ 0L
))
df_miss <- df_full %>%
filter(!is.na(readmission_rate))
g1 <- ggplot(df_miss, aes(col, row)) +
geom_point(aes(size = coverage_obscured, color = readmission_rate * 100), alpha = 0.8) +
geom_text(data = df_full, aes(label = state), color = "black", nudge_y = 0.05) +
geom_text(aes(label = percent(readmission_rate, 0.01)), nudge_y = -0.1, size = 2.5) +
theme_void() +
scale_color_gradient2(low = "#3F6E9A", high = "#AB4C30", midpoint = median(df_miss$readmission_rate * 100), mid = "#E7D9C6") +
scale_size(range = c(3, 30)) +
scale_y_reverse() +
theme(plot.margin = margin(r = 30)) +
labs(color = "Readmission",
size = "Coverage")
g2 <- ggplot(df_miss, aes(col, row)) +
geom_point(aes(size = coverage_obscured, color = colorectal_cancer_screenings), alpha = 0.8) +
geom_text(data = df_full, aes(label = state), color = "black", nudge_y = 0.05) +
geom_text(aes(label = percent(colorectal_cancer_screenings/100, 0.01)), nudge_y = -0.1, size = 2.5) +
theme_void() +
scale_color_gradient2(low = "#3F6E9A", high = "#AB4C30", midpoint = median(df_miss$colorectal_cancer_screenings), mid = "#E7D9C6") +
scale_size(range = c(3, 30)) +
scale_y_reverse() +
labs(color = "Cancer Screening",
size = "Coverage")
g1 + g2 + plot_layout(guides = "collect")
Figure S1: This figure recreates Figure 3 in Basole et al. (2021).
theme_set(theme_classic())
g1 <- ggplot(df_miss, aes(coverage_obscured * 100, colorectal_cancer_screenings)) +
geom_point() +
labs(x = "Coverage (%)", y = "Cancer Screening (%)") +
geom_smooth(method = loess, formula = y ~ x) +
annotate("richtext", x = 80, y = 73, label.color = NA, fill = "transparent", label = glue::glue("R<sup>2</sup> = {scales::comma(cor(df_miss$coverage_obscured, df_miss$colorectal_cancer_screenings), 0.001)}"))
g2 <- ggplot(df_miss, aes(coverage_obscured * 100, readmission_rate * 100)) +
geom_point() +
labs(x = "Coverage (%)", y = "Readmission (%)") +
geom_smooth(method = loess, formula = y ~ x) +
annotate("richtext", x = 80, y = 15.3, label.color = NA, fill = "transparent", label = glue::glue("R<sup>2</sup> = {scales::comma(cor(df_miss$coverage_obscured, df_miss$readmission_rate), 0.001)}"))
g1 + g2
Figure S2: This is an alternative graph design for Figure S1.
set.seed(2021)
lineup_data <- null_permute("colorectal_cancer_screenings") %>%
lineup(true = df_miss, n = 20, pos = 3)
plot_lineup_theirs <- ggplot(lineup_data, aes(col, row)) +
geom_point(aes(size = coverage_obscured, color = colorectal_cancer_screenings), alpha = 0.8) +
theme_void() +
scale_color_gradient2(low = "#3F6E9A", high = "#AB4C30", midpoint = median(df_miss$colorectal_cancer_screenings), mid = "#E7D9C6") +
scale_size(range = c(1, 5)) +
scale_y_reverse(expand = c(0.1, 0.2)) +
guides(color = "none", size = "none") +
facet_wrap(~.sample, ncol = 5) +
scale_x_continuous(expand = c(0.1, 0.1)) +
theme(legend.position = "bottom",
strip.text = element_text(size = 18, margin = margin(t = 3, b = 3)),
strip.background = element_rect(color = "black", size = 1.5))
plot_lineup_theirs
Figure S3: The lineup for the tile grid plot.
plot_lineup_ours <- ggplot(lineup_data, aes(coverage_obscured * 100, colorectal_cancer_screenings)) +
geom_point() +
geom_smooth(method = loess, formula = y ~ x) +
facet_wrap(~.sample, ncol = 5) +
scale_x_continuous(expand = c(0.1, 0.1)) +
theme(legend.position = "bottom",
strip.text = element_text(size = 18, margin = margin(t = 3, b = 3)),
strip.background = element_rect(color = "black", size = 1.5),
axis.text = element_blank(),
axis.title = element_blank(),
axis.line = element_blank(),
axis.ticks.length = unit(0, "pt"))
plot_lineup_ours
Figure S4: The lineup for the scatter plot.
The following are plots based on data that purposely modifies cancer screening to induce a higher association with the coverage. This higher association is induced (as shown in the code below) by rearranging data by the coverage and modifying the cancer screening percentage so that it is ordered from low to high.
df_false <- df_miss %>%
arrange(coverage_obscured) %>%
mutate(colorectal_cancer_screenings = sort(colorectal_cancer_screenings))
lineup_false_data <- null_permute("colorectal_cancer_screenings") %>%
lineup(true = df_false, n = 20, pos = 5)
plot_lineup_theirs %+% lineup_false_data
Figure S5: Which plot looks the most strikingly different to you?
plot_lineup_ours %+% lineup_false_data
Figure S6: The above shows a lineup for data that was purposely manipulated so that two variables have a higher association. How easy was it to spot the data plot compared to Figure S5?
We thank Basole et al. (2021) for supplying us the synthetic data to draw the above plots.
sessioninfo::session_info()
## ─ Session info ────────────────────────────────────────────────────────────────────
## setting value
## version R version 4.0.1 (2020-06-06)
## os macOS 10.16
## system x86_64, darwin17.0
## ui RStudio
## language (EN)
## collate en_AU.UTF-8
## ctype en_AU.UTF-8
## tz Australia/Melbourne
## date 2021-09-19
##
## ─ Packages ────────────────────────────────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.0.0)
## backports 1.2.1 2020-12-09 [1] CRAN (R 4.0.2)
## bookdown 0.22.17 2021-08-07 [1] Github (rstudio/bookdown@9615b14)
## broom 0.7.9 2021-07-27 [1] CRAN (R 4.0.2)
## bslib 0.2.5 2021-05-12 [1] CRAN (R 4.0.1)
## cellranger 1.1.0 2016-07-27 [2] CRAN (R 4.0.0)
## class 7.3-19 2021-05-03 [2] CRAN (R 4.0.2)
## cli 3.0.1 2021-07-17 [1] CRAN (R 4.0.2)
## cluster 2.1.2 2021-04-17 [2] CRAN (R 4.0.2)
## codetools 0.2-18 2020-11-04 [2] CRAN (R 4.0.1)
## colorspace 2.0-1 2021-05-04 [1] CRAN (R 4.0.2)
## crayon 1.4.1 2021-02-08 [1] CRAN (R 4.0.2)
## DBI 1.1.1 2021-01-15 [1] CRAN (R 4.0.2)
## dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.0.2)
## DEoptimR 1.0-8 2016-11-19 [2] CRAN (R 4.0.0)
## digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.2)
## diptest 0.76-0 2021-05-04 [2] CRAN (R 4.0.2)
## dplyr * 1.0.7 2021-06-18 [1] CRAN (R 4.0.2)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.0.2)
## evaluate 0.14 2019-05-28 [2] CRAN (R 4.0.0)
## fansi 0.5.0 2021-05-25 [1] CRAN (R 4.0.2)
## farver 2.1.0 2021-02-28 [1] CRAN (R 4.0.2)
## flexmix 2.3-17 2020-10-12 [1] CRAN (R 4.0.2)
## forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.0.2)
## fpc 2.2-9 2020-12-06 [2] CRAN (R 4.0.2)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
## generics 0.1.0 2020-10-31 [2] CRAN (R 4.0.2)
## ggplot2 * 3.3.3 2020-12-30 [1] CRAN (R 4.0.1)
## ggtext * 0.1.1 2020-12-17 [1] CRAN (R 4.0.2)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
## gridtext 0.1.4 2020-12-10 [1] CRAN (R 4.0.2)
## gtable 0.3.0 2019-03-25 [2] CRAN (R 4.0.0)
## haven 2.4.1 2021-04-23 [2] CRAN (R 4.0.2)
## here * 1.0.1 2020-12-13 [2] CRAN (R 4.0.2)
## highr 0.9 2021-04-16 [2] CRAN (R 4.0.2)
## hms 1.1.0 2021-05-17 [1] CRAN (R 4.0.2)
## htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.0.2)
## httpuv 1.6.1 2021-05-07 [2] CRAN (R 4.0.2)
## httr 1.4.2 2020-07-20 [1] CRAN (R 4.0.2)
## janitor * 2.1.0 2021-01-05 [2] CRAN (R 4.0.2)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.0.2)
## jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.0.2)
## kernlab 0.9-29 2019-11-12 [2] CRAN (R 4.0.0)
## knitr 1.33 2021-04-24 [1] CRAN (R 4.0.2)
## labeling 0.4.2 2020-10-20 [1] CRAN (R 4.0.2)
## later 1.2.0 2021-04-23 [1] CRAN (R 4.0.2)
## lattice 0.20-44 2021-05-02 [2] CRAN (R 4.0.2)
## lifecycle 1.0.0 2021-02-15 [1] CRAN (R 4.0.2)
## lubridate 1.7.10 2021-02-26 [1] CRAN (R 4.0.2)
## magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.2)
## markdown 1.1 2019-08-07 [2] CRAN (R 4.0.0)
## MASS 7.3-54 2021-05-03 [1] CRAN (R 4.0.2)
## Matrix 1.3-3 2021-05-04 [2] CRAN (R 4.0.2)
## mclust 5.4.7 2020-11-20 [2] CRAN (R 4.0.2)
## mgcv 1.8-35 2021-04-18 [2] CRAN (R 4.0.2)
## mime 0.11 2021-06-23 [1] CRAN (R 4.0.2)
## modelr 0.1.8 2020-05-19 [2] CRAN (R 4.0.0)
## modeltools 0.2-23 2020-03-05 [2] CRAN (R 4.0.0)
## moments 0.14 2015-01-05 [2] CRAN (R 4.0.0)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.0.0)
## nlme 3.1-152 2021-02-04 [2] CRAN (R 4.0.2)
## nnet 7.3-16 2021-05-03 [2] CRAN (R 4.0.2)
## nullabor * 0.3.9 2020-02-25 [1] CRAN (R 4.0.2)
## patchwork * 1.1.1 2020-12-17 [1] CRAN (R 4.0.2)
## pillar 1.6.2 2021-07-29 [1] CRAN (R 4.0.2)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.0.0)
## prabclus 2.3-2 2020-01-08 [2] CRAN (R 4.0.0)
## promises 1.2.0.1 2021-02-11 [1] CRAN (R 4.0.2)
## prompt 1.0.1 2021-03-12 [1] CRAN (R 4.0.2)
## purrr * 0.3.4 2020-04-17 [2] CRAN (R 4.0.0)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.0.1)
## Rcpp 1.0.7 2021-07-07 [1] CRAN (R 4.0.2)
## readr * 2.0.1 2021-08-10 [1] CRAN (R 4.0.2)
## readxl * 1.3.1 2019-03-13 [2] CRAN (R 4.0.0)
## reprex 2.0.0 2021-04-02 [1] CRAN (R 4.0.2)
## rlang 0.4.11 2021-04-30 [1] CRAN (R 4.0.2)
## rmarkdown 2.10 2021-08-06 [1] CRAN (R 4.0.1)
## robustbase 0.93-7 2021-01-04 [2] CRAN (R 4.0.2)
## rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.0.2)
## rsconnect 0.8.17 2021-04-09 [1] CRAN (R 4.0.2)
## rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.0.1)
## rvest 1.0.1 2021-07-26 [1] CRAN (R 4.0.2)
## sass 0.4.0 2021-05-12 [1] CRAN (R 4.0.2)
## scales * 1.1.1 2020-05-11 [2] CRAN (R 4.0.0)
## servr 0.22 2021-04-14 [1] CRAN (R 4.0.2)
## sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 4.0.0)
## snakecase 0.11.0 2019-05-25 [2] CRAN (R 4.0.0)
## stringi 1.7.3 2021-07-16 [1] CRAN (R 4.0.2)
## stringr * 1.4.0 2019-02-10 [2] CRAN (R 4.0.0)
## tibble * 3.1.3 2021-07-23 [1] CRAN (R 4.0.2)
## tidyr * 1.1.3 2021-03-03 [1] CRAN (R 4.0.2)
## tidyselect 1.1.1 2021-04-30 [1] CRAN (R 4.0.2)
## tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.0.2)
## tzdb 0.1.2 2021-07-20 [1] CRAN (R 4.0.2)
## utf8 1.2.2 2021-07-24 [1] CRAN (R 4.0.2)
## vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.0.2)
## withr 2.4.2 2021-04-18 [1] CRAN (R 4.0.2)
## xaringan 0.20.1 2021-03-25 [1] Github (yihui/xaringan@1cca625)
## xfun 0.24 2021-06-15 [1] CRAN (R 4.0.2)
## xml2 1.3.2 2020-04-23 [2] CRAN (R 4.0.0)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.2)
##
## [1] /Users/etan0038/Library/R/4.0/library
## [2] /Library/Frameworks/R.framework/Versions/4.0/Resources/library